COVID-19 Vaccine Distribution Reccommendation

northeast <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont", "New Jersey", "New York", "Pennsylvania", "New York City")
midwest <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota")
south <- c("Delaware", "Oklahoma", "Florida", "Tennessee", "Texas", "Kentucky", "Arkansas", "Louisiana", "Georgia", "Alabama", "Maryland", "North Carolina", "South Carolina", "Virginia", "District of Columbia", "Mississippi", "West Virginia")
west <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska", "California", "Hawaii", "Oregon", "Washington")


SexAge$Region <- SexAge$State

for(i in seq(1, nrow(SexAge), 1)){
  if (SexAge$State[i] %in% northeast)
  {
    SexAge$Region[i] <- "northeast"
  }
  else if (SexAge$State[i] %in% south)
  {
    SexAge$Region[i] <- "south"
  }
  else if (SexAge$State[i] %in% midwest)
  {
    SexAge$Region[i] <- "midwest"
  }
  else if (SexAge$State[i] %in% west)
  {
    SexAge$Region[i] <- "west"
  }
}

RaceEthnicity$Region <- RaceEthnicity$State

for(i in seq(1, nrow(RaceEthnicity), 1)){
  if (RaceEthnicity$State[i] %in% northeast)
  {
    RaceEthnicity$Region[i] <- "northeast"
  }
  else if (RaceEthnicity$State[i] %in% south)
  {
    RaceEthnicity$Region[i] <- "south"
  }
  else if (RaceEthnicity$State[i] %in% midwest)
  {
    RaceEthnicity$Region[i] <- "midwest"
  }
  else if (RaceEthnicity$State[i] %in% west)
  {
    RaceEthnicity$Region[i] <- "west"
  }
}

Ethnicity and COVID-19 deaths

RaceEthnicity<- pivot_longer(RaceEthnicity, c(`Non.Hispanic.White`,`Non.Hispanic.Black.or.African.American`, `Non.Hispanic.American.Indian.or.Alaska.Native`,`Non.Hispanic.Asian`,`Non.Hispanic.Native.Hawaiian.or.Other.Pacific.Islander`,`Hispanic.or.Latino`,`Other`),names_to="Ethnicity",values_to="Proportion")

Marianna <- RaceEthnicity %>%
  filter(Indicator == "Distribution of COVID-19 deaths (%)" | Indicator == "Unweighted distribution of population (%)") %>%
  pivot_wider(names_from = "Indicator", values_from = "Proportion") %>%
  filter(Region != "United States")%>%
  group_by(Region, Ethnicity) %>%
  summarize(`Distribution of COVID deaths (%)` = mean(`Distribution of COVID-19 deaths (%)`, na.rm = TRUE), `Distribution of Pop (%)` = mean(`Unweighted distribution of population (%)`, na.rm = TRUE)) %>%
  pivot_longer(c("Distribution of COVID deaths (%)", "Distribution of Pop (%)"), names_to = "indicator", values_to="Proportion")
## `summarise()` regrouping output by 'Region' (override with `.groups` argument)
  p_1<-ggplot(Marianna, aes(x = Proportion, y = Ethnicity, fill = indicator)) + facet_wrap(~Region) + geom_bar(stat = "identity")
  p_1 + theme(legend.position = "bottom", legend.title = element_blank())

Gender and COVID-19 Deaths

#Create a new dataset that only includes COVID deaths variable

COVID_gender <- SexAge %>% select(Sex, State, `COVID-19 Deaths`, `Age group`)
COVID_gender
## # A tibble: 2,661 x 4
##    Sex       State         `COVID-19 Deaths` `Age group` 
##    <chr>     <chr>                     <dbl> <chr>       
##  1 All Sexes United States            261530 All Ages    
##  2 All Sexes United States                29 Under 1 year
##  3 All Sexes United States               133 0-17 years  
##  4 All Sexes United States                17 1-4 years   
##  5 All Sexes United States                46 5-14 years  
##  6 All Sexes United States               449 15-24 years 
##  7 All Sexes United States              1089 18-29 years 
##  8 All Sexes United States              1909 25-34 years 
##  9 All Sexes United States             11247 30-49 years 
## 10 All Sexes United States              4917 35-44 years 
## # … with 2,651 more rows
#BAsed on this table, as of December 9, 2020, there have been a total of 261,530 deaths caused by COVID-19. 

COVID_sex <- COVID_gender %>% filter(`Age group`== "All Ages", State=="United States")
COVID_sex
## # A tibble: 4 x 4
##   Sex       State         `COVID-19 Deaths` `Age group`
##   <chr>     <chr>                     <dbl> <chr>      
## 1 All Sexes United States            261530 All Ages   
## 2 Male      United States            141640 All Ages   
## 3 Female    United States            119883 All Ages   
## 4 Unknown   United States                 7 All Ages
#Create a bar plot to show the distribution of deaths by sex

graph <- plot_ly(COVID_sex, x=~Sex, y=~`COVID-19 Deaths`, type="bar", color=~Sex)
graph
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
COVID_gender <- SexAge %>% select(Sex, State, `COVID-19 Deaths`, `Age group`, Region)

COVID_sex <- COVID_gender %>% 
  filter(`Age group`== "All Ages") %>%
  filter(Region != "United States" & Region != "Puerto Rico")%>%
  group_by(Region, Sex) %>%
  summarize(deaths = sum(`COVID-19 Deaths`, na.rm = TRUE))
## `summarise()` regrouping output by 'Region' (override with `.groups` argument)
p_2 <- ggplot(COVID_sex, aes(x = COVID_sex$Sex, y = COVID_sex$deaths, fill = COVID_sex$Sex)) + geom_bar(stat = "identity") + facet_wrap(~Region) + ylab("Deaths") + xlab("Sex") + ggtitle("Relationship between Sex and Covid-19 deaths by region") + labs(fill = "Sex")
ggplotly(p_2)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Age and COVID-19 deaths

COVIDage <- Provisional_COVID.19_Death_Counts_by_Sex__Age__and_State

AgeGroup0 <- COVIDage %>%
  filter(State=="United States", Sex=="All Sexes",Age.group==c("0-17 years"))
    
AgeGroup1 <- COVIDage %>%
  filter(State=="United States", Sex=="All Sexes",Age.group==c("18-29 years","50-64 years"))
## Warning in Age.group == c("18-29 years", "50-64 years"): longer object length is
## not a multiple of shorter object length
AgeGroup2 <- COVIDage %>%
  filter(State=="United States", Sex=="All Sexes",Age.group==c("30-49 years","65-74 years"))
## Warning in Age.group == c("30-49 years", "65-74 years"): longer object length is
## not a multiple of shorter object length
AgeGroup3 <- COVIDage %>%
  filter(State=="United States", Sex=="All Sexes",Age.group==c("85 years and over"))

DeathvsAge <- bind_rows(AgeGroup0, AgeGroup1, AgeGroup2, AgeGroup3) %>% 
                arrange(desc(Age.group)) %>% 
                  select(-c(8:13))
DeathvsAge %>%
  ggplot(aes(x=Age.group,y=COVID.19.Deaths)) +
    geom_bar(stat="identity", col="red", fill="steelblue")

COVIDage <- SexAge %>%
  group_by(`Age group`, Region)%>%
  summarize(covidDeaths = sum(`COVID-19 Deaths`, na.rm = TRUE)) %>%
  filter(Region != "Puerto Rico" & Region != "United States")%>%
  filter(`Age group` != "Under 1 year" & `Age group` != "All Ages" & `Age group` != "1-4 years" & `Age group` != "5-14 years")
## `summarise()` regrouping output by 'Age group' (override with `.groups` argument)
ggplot(COVIDage, aes(x = `Age group`, y = covidDeaths)) + geom_bar(stat = "identity", col = "red", fill = "steelblue") + facet_wrap(~Region) + coord_flip() + ylab("Covid-19 Deaths")

Map of states with top 5 COVID-19 deaths

# Code used to produce map of states most impacted by COVID-19

states<-map_data("state")
top_5<- subset(states, region %in% c("texas", "california", "florida", "new jersey", "new york"))

ggplot(data = top_5) + 
  geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") + 
  coord_fixed(1.3)

COVID-19 deaths per state

# Code used to create graph of COVID-19 deaths by state
covid_totals<- COVID_TOTALS_lowercase%>%
  select(State, COVID.19.Deaths)
map_covid <- left_join(covid_totals, states, by= c("State"="region"))

ggplot(map_covid, aes(long, lat, group = group)) +
        geom_polygon(aes(fill = COVID.19.Deaths, color="yellow"),
                     colour = alpha("red", 1/2))  +
          scale_fill_gradient(low="blue", high="red",
                              breaks = c(2, 4, 10, 100, 1000, 10000),
                              trans = "log10")+
        geom_polygon(data = states, colour = "black", fill = NA) + 
        theme_void() +
  coord_fixed(1.2) 

Sex and COVID-19 deaths by State

# Code used to produce graph of COVID-19 deaths by sex in the 5 states with the most COVID-19 deaths
sex_age_state<- Provisional_COVID.19_Death_Counts_by_Sex__Age__and_State
graph_data_ggplot <- sex_age_state%>%
  filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"), Sex== c("Male", "Female"))
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
## Warning in Sex == c("Male", "Female"): longer object length is not a multiple of
## shorter object length
graph_state_sex<- ggplot(graph_data_ggplot, aes(fill=Sex, y=COVID.19.Deaths, x=State))+
  geom_bar(position = "stack", stat = "identity")
graph_state_sex
## Warning: Removed 2 rows containing missing values (position_stack).

Age and COVID-19 deaths by State

# Code used to produce graph of COVID-19 deaths by age in the 5 states with the most COVID-19 deaths
sex_state <- sex_age_state%>%
  filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"), Age.group!= "All Ages")
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
age_graph <- ggplot(sex_state, aes(fill=Age.group, y=COVID.19.Deaths, x=State))+
  geom_bar(position = "stack", stat = "identity")
age_graph
## Warning: Removed 5 rows containing missing values (position_stack).

Ethnicity and COVID-19 deaths by State

# Code used to produce graph of COVID-19 deaths by ethnicity in the 5 states with the most COVID-19 deaths
ethnicity_covid<-daata[c(5:208),-c(13)]

newest_ethnicity<- ethnicity_covid%>%
  filter(Indicator!= c("Count of COVID-19 deaths","Weighted distribution of population (%)"))

newest_ethnicitycovid<-pivot_longer(newest_ethnicity,c(`Non.Hispanic.White`,`Non.Hispanic.Black.or.African.American`, `Non.Hispanic.American.Indian.or.Alaska.Native`,`Non.Hispanic.Asian`,`Hispanic.or.Latino`,`Other`),names_to="Ethnicity",values_to="Proportion")

new_newest_ethnicitycovid<- newest_ethnicitycovid%>%
  filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"))
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
race_graph <- ggplot(new_newest_ethnicitycovid, aes(fill=Ethnicity, y=Proportion, x=State))+
  geom_bar(position = "stack", stat = "identity")
race_graph